library(AER)
## Zorunlu paket yükleniyor: car
## Zorunlu paket yükleniyor: carData
## Zorunlu paket yükleniyor: lmtest
## Zorunlu paket yükleniyor: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Zorunlu paket yükleniyor: sandwich
## Zorunlu paket yükleniyor: survival
library(TSA)
##
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.Arima TSA
## plot.Arima TSA
library(tseries)
library(pdR)
library(gridExtra)
library(caschrono)
library(datasets)
library(readxl)
library(anomalize)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ✖ readr::spec() masks TSA::spec()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(caschrono)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
Data Import
setwd("C:/Users/kagan/Desktop")
options(scipen = 999)
df <- read_excel("ımport.xls")
data <- ts(df$obs,start = c(1989,1),end = c(2023,9),frequency = 12)
df$date <- as.Date(df$date,format="%Y-%m-%d")
Time series plot and descriptives
cbind(head(df)," ",tail(df))
## date obs " " date obs
## 1 1989-01-01 37513.2 2023-04-01 260996.7
## 2 1989-02-01 38561.7 2023-05-01 254134.3
## 3 1989-03-01 39724.7 2023-06-01 251039.7
## 4 1989-04-01 38664.7 2023-07-01 256218.4
## 5 1989-05-01 40909.6 2023-08-01 253667.5
## 6 1989-06-01 39780.9 2023-09-01 260612.7
sum(is.na(data))
## [1] 0
summary(data)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 37513 73623 139189 135539 189809 289568
sd(data)
## [1] 66574
head(df)
## # A tibble: 6 × 2
## date obs
## <date> <dbl>
## 1 1989-01-01 37513.
## 2 1989-02-01 38562.
## 3 1989-03-01 39725.
## 4 1989-04-01 38665.
## 5 1989-05-01 40910.
## 6 1989-06-01 39781.
tail(data)
## Apr May Jun Jul Aug Sep
## 2023 260996.7 254134.3 251039.7 256218.4 253667.5 260612.7
options(scipen = 999)
ts_plot <- df %>%
ggplot( aes(x=date, y=obs)) +
geom_area(fill="grey", alpha=0.5) +
geom_line(color="grey") +
labs(title="Time Series Plot of U.S. Imports of \nGoods by Customs Basis from World",
x="Year",y="Millons of Dollars")+
theme_minimal()
ts_plot <- ggplotly(ts_plot)
ts_plot
Descriptive Plots
data2<- df %>% dplyr::mutate(year = lubridate::year(date), month = lubridate::month(date))
data2 <- data2[,-1]
head(data2)
## # A tibble: 6 × 3
## obs year month
## <dbl> <dbl> <dbl>
## 1 37513. 1989 1
## 2 38562. 1989 2
## 3 39725. 1989 3
## 4 38665. 1989 4
## 5 40910. 1989 5
## 6 39781. 1989 6
data2$year <- as.factor(data2$year)
data2$month <- as.factor(data2$month)
bp_month <- ggplot(data2,aes(x=month,y=obs,fill=month))+
geom_boxplot()+
labs(title="Boxplot Across Months",x="Months",y="Millons of Dollars")+
theme_minimal()+
guides(fill = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
bp_month

bp_year <- ggplot(data2,aes(x=year,y=obs,fill=year))+
geom_boxplot()+
labs(title="Boxplot Across Years",x="Years",y="Millons of Dollars")+
guides(fill = FALSE)+
scale_x_discrete(breaks = seq(1989, 2023, 4), labels = seq(1989, 2023, 4))
bp_year

seasonal_check <- ggplot(data2, aes(as.numeric(as.character(month)), obs))+
geom_line(aes(colour = year))+
labs(title="Series by Years",x="",y="Millons of Dollars")+
guides(colour = FALSE)
seasonal_check

grid.arrange(bp_month,bp_year,seasonal_check,ncol=1)

Cross validation
train <- head(data,-12)
test <- tail(data,12)
length(test)
## [1] 12
Anomaly Detection and Removing
train_for_anomaly <- head(df,-12)
class(train_for_anomaly)
## [1] "tbl_df" "tbl" "data.frame"
train_for_anomaly%>%
time_decompose(obs, method = "stl", frequency = "auto", trend = "auto") %>%
anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>%
plot_anomaly_decomposition()
## Converting from tbl_df to tbl_time.
## Auto-index message: index = date
## frequency = 12 months
## trend = 60 months

train_for_anomaly %>%
time_decompose(obs) %>%
anomalize(remainder) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)
## Converting from tbl_df to tbl_time.
## Auto-index message: index = date
## frequency = 12 months
## trend = 60 months

library(knitr)
anomalized_set <- train_for_anomaly %>%
# 1. Decompose download counts and anomalize the STL decomposition remainder
time_decompose(obs) %>%
# 2. Fix negative values if any in observed
mutate(observed = ifelse(observed < 0, 0, observed)) %>%
# 3. Identify anomalies
anomalize(remainder) %>%
# 4. Clean & repair anomalous data
clean_anomalies()
## Converting from tbl_df to tbl_time.
## Auto-index message: index = date
## frequency = 12 months
## trend = 60 months
anomalized_set
## # A time tibble: 405 × 9
## # Index: date
## date observed season trend remainder remainder_l1 remainder_l2
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1989-01-01 37513. -336. 38814. -965. -18189. 18082.
## 2 1989-02-01 38562. -1054. 38915. 701. -18189. 18082.
## 3 1989-03-01 39725. 655. 39016. 53.5 -18189. 18082.
## 4 1989-04-01 38665. 89.9 39117. -542. -18189. 18082.
## 5 1989-05-01 40910. 293. 39217. 1399. -18189. 18082.
## 6 1989-06-01 39781. 342. 39318. 121. -18189. 18082.
## 7 1989-07-01 38951. -461. 39419. -6.54 -18189. 18082.
## 8 1989-08-01 40117. -78.5 39525. 671. -18189. 18082.
## 9 1989-09-01 39233. 68.0 39631. -466. -18189. 18082.
## 10 1989-10-01 40961. 357. 39736. 868. -18189. 18082.
## # ℹ 395 more rows
## # ℹ 2 more variables: anomaly <chr>, observed_cleaned <dbl>
anomalized_set %>%
filter(anomaly == "Yes") %>%
select(date, anomaly, observed, observed_cleaned) %>%
head() %>%
kable()
| 2008-02-01 |
Yes |
179496 |
160304.7 |
| 2008-04-01 |
Yes |
182776 |
160506.7 |
| 2008-05-01 |
Yes |
183767 |
160238.9 |
| 2008-06-01 |
Yes |
186963 |
159816.6 |
| 2008-07-01 |
Yes |
194334 |
158542.2 |
| 2008-08-01 |
Yes |
186261 |
159080.6 |
clean <- data.frame(date=anomalized_set$date,observations=anomalized_set$observed_cleaned)
class(clean)
## [1] "data.frame"
clean_ts <- ts(clean$observations,start = c(1989,1),end = c(2022,9),frequency = 12)
autoplot(clean_ts,col="black",ylab="Millions of Dollars",xlab="Date",
main="Anomaly Cleaned Time Series Plot",lwd=1)+theme_minimal()+
autolayer(train,color="red",lwd=1)

Trend determining
par(mfrow=c(1,2))
acf(as.vector(trans),main="ACF of Transformed TS")
pacf(as.vector(trans),main="PACF of Transformed TS")

#kpss test
kpss.test(trans,null="Level") #it says that series is not stationary
## Warning in kpss.test(trans, null = "Level"): p-value smaller than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: trans
## KPSS Level = 6.603, Truncation lag parameter = 5, p-value = 0.01
kpss.test(trans,null="Trend") # it says that we have stochastic trend
## Warning in kpss.test(trans, null = "Trend"): p-value smaller than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: trans
## KPSS Trend = 1.3816, Truncation lag parameter = 5, p-value = 0.01
# adf test
mean(trans)
## [1] 34.20278
library(fUnitRoots)
adfTest(trans,lags=1,type = c("c"))
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## Dickey-Fuller: -1.4309
## P VALUE:
## 0.5251
##
## Description:
## Sun Jan 21 21:28:39 2024 by user: kagan
adfTest(trans,lags=1,type = c("ct"))
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## Dickey-Fuller: -1.2314
## P VALUE:
## 0.9008
##
## Description:
## Sun Jan 21 21:28:39 2024 by user: kagan
Detrending
ndiffs(trans)
## [1] 1
nsdiffs(trans)
## [1] 0
diff <- diff(trans)
kpss.test(diff,null=c("Level"))
## Warning in kpss.test(diff, null = c("Level")): p-value greater than printed
## p-value
##
## KPSS Test for Level Stationarity
##
## data: diff
## KPSS Level = 0.2168, Truncation lag parameter = 5, p-value = 0.1
mean(diff)
## [1] 0.02922785
adfTest(diff,lags=1,type = "nc")
## Warning in adfTest(diff, lags = 1, type = "nc"): p-value smaller than printed
## p-value
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## Dickey-Fuller: -16.445
## P VALUE:
## 0.01
##
## Description:
## Sun Jan 21 21:28:39 2024 by user: kagan
# Differenced TS Plot
autoplot(diff,col="darkblue",main="Differenced Time Series Plot")+theme_minimal()

Model Suggestion
par(mfrow=c(1,2))
acf(as.vector(diff),main="ACF of Differenced TS")
pacf(as.vector(diff),main="PACF of Differenced TS")

# ARIMA(3,1,0)
# ARIMA(3,1,3)
# ARIMA(3,1,1)
armaselect(diff)
## p q sbc
## [1,] 0 1 -1517.819
## [2,] 0 3 -1515.804
## [3,] 3 1 -1515.062
## [4,] 1 1 -1514.228
## [5,] 1 3 -1513.082
## [6,] 0 2 -1512.495
## [7,] 0 4 -1511.753
## [8,] 2 2 -1510.896
## [9,] 2 1 -1510.749
## [10,] 4 1 -1509.525
# ARIMA(0,1,1)
eacf(diff)
## AR/MA
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x o o o o o o o o o x o
## 1 x o x o o o o o o o o o x o
## 2 x x x o o o o o o o o o x o
## 3 x o x o o o o o o o o o o o
## 4 x o o x o o o o o o o o o o
## 5 x o o x x o o o o o o o o o
## 6 x x x x o x o o o o o o x o
## 7 o o x x o x o o o o o o o o
# ARIMA(0,1,3)
auto.arima(train)
## Series: train
## ARIMA(0,1,3) with drift
##
## Coefficients:
## ma1 ma2 ma3 drift
## 0.0448 0.1494 0.1919 571.1675
## s.e. 0.0498 0.0514 0.0479 262.4084
##
## sigma^2 = 14672816: log likelihood = -3904.62
## AIC=7819.25 AICc=7819.4 BIC=7839.25
# ARIMA(0,1,3)
fit1<-Arima(trans,order=c(3,1,0),method = "CSS-ML") #model suggested by us
fit1
## Series: trans
## ARIMA(3,1,0)
##
## Coefficients:
## ar1 ar2 ar3
## -0.2686 0.0307 0.2136
## s.e. 0.0486 0.0504 0.0486
##
## sigma^2 = 0.02414: log likelihood = 180.36
## AIC=-352.72 AICc=-352.62 BIC=-336.71
#significant
fit2<-Arima(trans,order=c(3,1,3),method = "CSS-ML") #model suggested by us
fit2
## Series: trans
## ARIMA(3,1,3)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## 0.7058 -0.2725 0.5655 -1.0074 0.5887 -0.5696
## s.e. 0.1503 0.2146 0.1234 0.1316 0.2448 0.1575
##
## sigma^2 = 0.02335: log likelihood = 187.99
## AIC=-361.97 AICc=-361.69 BIC=-333.96
#significant
fit3<-Arima(trans,order=c(3,1,1),method = "CSS-ML") #model suggested by us
fit3
## Series: trans
## ARIMA(3,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ma1
## 0.4638 0.2166 0.1779 -0.7793
## s.e. 0.1195 0.0571 0.0566 0.1169
##
## sigma^2 = 0.02389: log likelihood = 182.96
## AIC=-355.93 AICc=-355.78 BIC=-335.92
#significant
fit4<-Arima(trans,order=c(0,1,1),method = "CSS-ML") #model suggested by armaselect
fit4
## Series: trans
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.2313
## s.e. 0.0414
##
## sigma^2 = 0.0254: log likelihood = 169.2
## AIC=-334.4 AICc=-334.37 BIC=-326.39
#significant
fit5<-Arima(trans,order=c(0,1,3),method = "CSS-ML") #model suggested by eacf and autoarima
fit5
## Series: trans
## ARIMA(0,1,3)
##
## Coefficients:
## ma1 ma2 ma3
## -0.2744 0.0880 0.1480
## s.e. 0.0503 0.0554 0.0486
##
## sigma^2 = 0.02434: log likelihood = 178.73
## AIC=-349.47 AICc=-349.36 BIC=-333.46
#significant
(min(cbind(fit1$bic,fit2$bic,fit3$bic,fit4$bic,fit5$bic)))
## [1] -336.7123
# fit1 which is ARIMA(3,1,0) has the lowest BIC value so we will choose it.
Diagnostic Checking
# Normality of Residuals
r <- rstandard(fit1)
head(r)
## Jan Feb Mar Apr May Jun
## 1989 0.1766732 0.9021252 1.2636755 -0.6185974 1.4861713 -0.6438361
r1 <- autoplot(r)+geom_line(y=0)+theme_minimal()+ggtitle("Plot of The Residuals")
r2 <- ggplot(r, aes(sample = r)) +stat_qq()+geom_qq_line()+ggtitle("QQ Plot of the Residuals")+theme_minimal()
r3 <- ggplot(r,aes(x=r))+geom_histogram(bins=20)+geom_density()+ggtitle("Histogram of Residuals")+theme_minimal()
grid.arrange(r1,r2,r3,ncol=1)
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

jarque.bera.test(r)
##
## Jarque Bera Test
##
## data: r
## X-squared = 1755.3, df = 2, p-value < 0.00000000000000022
shapiro.test(r)
##
## Shapiro-Wilk normality test
##
## data: r
## W = 0.89013, p-value < 0.00000000000000022
# Detection of Serial Correlation
g1 <- ggAcf(as.vector(r),main="ACF of the Residuals",lag = 72)+theme_minimal() #to see time lags, as. factor function is used.
## Warning in ggplot2::geom_segment(lineend = "butt", ...): Ignoring unknown
## parameters: `main`
g2 <- ggPacf(as.vector(r),main="PACF of the Residuals",lag = 72)+theme_minimal()
## Warning in ggplot2::geom_segment(lineend = "butt", ...): Ignoring unknown
## parameters: `main`
grid.arrange(g1,g2,ncol=2)

#Breusch- Godfrey Test
library(lmtest)
z <- lm(r~1+zlag(r))
bgtest(z,order=15)
##
## Breusch-Godfrey test for serial correlation of order up to 15
##
## data: z
## LM test = 19.927, df = 15, p-value = 0.1747
# Since p value is greater than alpha, we have 95% confident that the residuals of the
# model are uncorrelated,according to results of Breusch-Godfrey Test.
#Box-Ljung Test
Box.test(r,lag=15,type=c("Ljung-Box"))
##
## Box-Ljung test
##
## data: r
## X-squared = 20.669, df = 15, p-value = 0.1477
# Since p value is greater than alpha, we have 95% confident that the
# residuals of the model are uncorrelated, according to results of Box-Ljung Test.
# Heteroscedasticity
r_sq <- r^2
g1_sq<-ggAcf(as.vector(r_sq),lag.max = 72)+theme_minimal()+ggtitle("ACF of Squared Residuals")
g2_sq<-ggPacf(as.vector(r_sq),lag.max = 72)+theme_minimal()+ggtitle("PACF of Squared Residuals") # heteroscedasticity check
grid.arrange(g1_sq,g2_sq,ncol=2)

library(FinTS)
##
## Attaching package: 'FinTS'
##
## The following object is masked from 'package:forecast':
##
## Acf
ArchTest(r)
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: r
## Chi-squared = 70.626, df = 12, p-value = 0.0000000002445
Garch Forecast
library(rugarch)
## Zorunlu paket yükleniyor: parallel
##
## Attaching package: 'rugarch'
## The following object is masked from 'package:purrr':
##
## reduce
## The following object is masked from 'package:stats':
##
## sigma
spec <- ugarchspec(
variance.model = list(model = "sGARCH", garchOrder = c(1, 1)),
mean.model = list(armaOrder = c(3, 0), include.mean = TRUE),
distribution.model = "norm")
garch_model <- ugarchfit(spec, data = train)
## Warning in .makefitmodel(garchmodel = "sGARCH", f = .sgarchLLH, T = T, m = m, :
## rugarch-->warning: failed to invert hessian
x <- residuals(garch_model)
jarque.bera.test(x)
##
## Jarque Bera Test
##
## data: x
## X-squared = 106635, df = 2, p-value < 0.00000000000000022
son <- ugarchforecast(garch_model,n.ahead=12)
library(TSstudio)
gf <- fitted(garch_model)
last <- gf[300:405]
last_ts <- xts_to_ts(last, frequency = NULL, start = NULL)
plot(son,which=1)
lines(test,col="red",lty=2 )
lines(last_ts,col="black")
abline(v=2022.700,col="red")

forecasted_values_garch <- as.numeric(son@forecast$seriesFor)
accuracy_g <- function(forecast, actual, insample) {
residuals <- forecast - actual
naive_forecast <- stats::lag(insample, k = 1)
naive_forecast <- c(tail(naive_forecast, -1), NA) # Align to the length of actual data
naive_errors <- actual - naive_forecast
naive_mae <- mean(abs(naive_errors[!is.na(naive_errors)]), na.rm = TRUE)
return(data.frame(
ME = mean(residuals, na.rm = TRUE),
RMSE = sqrt(mean(residuals^2, na.rm = TRUE)),
MAE = mean(abs(residuals), na.rm = TRUE),
MAPE = mean(abs(residuals / actual), na.rm = TRUE) * 100,
MASE = mean(abs(residuals), na.rm = TRUE) / naive_mae)
)
}
accuracy_results_garch_test <- accuracy_g(forecasted_values_garch,df$obs[406:417], df$obs[1:405])
## Warning in actual - naive_forecast: uzun olan nesne uzunluğu kısa olan nesne
## uzunluğunun bir katı değil
fitted_values_garch <- fitted(garch_model)
accuracy_results_garch_train <- accuracy_g(fitted_values_garch, df$obs[1:405], df$obs[1:405])
ETS forecast
etsmodel <- ets(train,model="ZZZ")
etsmodel
## ETS(M,Ad,N)
##
## Call:
## ets(y = train, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.7911
## beta = 0.2411
## phi = 0.8
##
## Initial states:
## l = 38388.5474
## b = 226.9602
##
## sigma: 0.0259
##
## AIC AICc BIC
## 8906.914 8907.125 8930.937
ets_forecast <- forecast(etsmodel,h=12)
clrs <- c("red", "pink", "yellow", "steelblue")
autoplot(ets_forecast) +
autolayer(ets_forecast$mean, series="Forecast") +
autolayer(fitted(etsmodel), series='Fitted') +
autolayer(train, series = 'Train') +
autolayer(test, series='Test', linetype="dashed") +
xlab("Time") + ylab("Millions of Dollars") +
guides(colour=guide_legend(title="Data series"),
fill=guide_legend(title="Prediction interval")) +
scale_color_manual(values=clrs)+ geom_vline(xintercept = 2022.833,color="black")+
theme_minimal()

accuracy(ets_forecast,test)
## ME RMSE MAE MPE MAPE MASE
## Training set 314.8915 3883.749 2383.856 0.2595838 1.807799 0.1822393
## Test set -6056.2630 7875.335 6962.084 -2.3836407 2.718762 0.5322325
## ACF1 Theil's U
## Training set 0.05181398 NA
## Test set -0.16116411 1.113304
jarque.bera.test(residuals(etsmodel))
##
## Jarque Bera Test
##
## data: residuals(etsmodel)
## X-squared = 569.12, df = 2, p-value < 0.00000000000000022
TBATS Forecast
tbatsmodel<-tbats(train)
tbatsmodel
## BATS(0.002, {0,3}, 1, -)
##
## Call: tbats(y = train)
##
## Parameters
## Lambda: 0.002488
## Alpha: 1.603217
## Beta: 0.007593507
## Damping Parameter: 1
## MA coefficients: -0.630339 0.534197 -0.111417
##
## Seed States:
## [,1]
## [1,] 10.676114523
## [2,] 0.008896374
## [3,] 0.000000000
## [4,] 0.000000000
## [5,] 0.000000000
## attr(,"lambda")
## [1] 0.002488298
##
## Sigma: 0.02572335
## AIC: 8896.795
tbats_forecast <- forecast(tbatsmodel,h=12)
clrs <- c("red", "pink", "yellow", "steelblue")
autoplot(tbats_forecast) +
autolayer(tbats_forecast$mean, series="Forecast") +
autolayer(fitted(tbatsmodel), series='Fitted') +
autolayer(train, series = 'Train') +
autolayer(test, series='Test', linetype="dashed") +
xlab("Time") + ylab("Millions of Dollars") +
guides(colour=guide_legend(title="Data series"),
fill=guide_legend(title="Prediction interval")) +
scale_color_manual(values=clrs)+ geom_vline(xintercept = 2022.833,color="black")+
theme_minimal()

accuracy(tbats_forecast,test)
## ME RMSE MAE MPE MAPE MASE
## Training set -211.3894 3848.676 2358.607 -0.1812168 1.773929 0.1803092
## Test set -17967.9098 20098.467 18392.950 -7.0162789 7.172916 1.4060914
## ACF1 Theil's U
## Training set 0.06244265 NA
## Test set 0.43499867 2.904998
jarque.bera.test(residuals(tbatsmodel))
##
## Jarque Bera Test
##
## data: residuals(tbatsmodel)
## X-squared = 592.83, df = 2, p-value < 0.00000000000000022
Prophet Forecast
library(prophet)
## Zorunlu paket yükleniyor: Rcpp
## Zorunlu paket yükleniyor: rlang
##
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
ds<-c(seq(as.Date("1989/01/01"),as.Date("2022/9/01"),by="month"))
pp<-data.frame(ds,y=as.numeric(train))
head(pp)
## ds y
## 1 1989-01-01 37513.2
## 2 1989-02-01 38561.7
## 3 1989-03-01 39724.7
## 4 1989-04-01 38664.7
## 5 1989-05-01 40909.6
## 6 1989-06-01 39780.9
m <- prophet(pp)
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future<-make_future_dataframe(m,periods = 12,freq='month') #periods 12, since it's a monthly series.
tail(future,24)
## ds
## 394 2021-10-01
## 395 2021-11-01
## 396 2021-12-01
## 397 2022-01-01
## 398 2022-02-01
## 399 2022-03-01
## 400 2022-04-01
## 401 2022-05-01
## 402 2022-06-01
## 403 2022-07-01
## 404 2022-08-01
## 405 2022-09-01
## 406 2022-10-01
## 407 2022-11-01
## 408 2022-12-01
## 409 2023-01-01
## 410 2023-02-01
## 411 2023-03-01
## 412 2023-04-01
## 413 2023-05-01
## 414 2023-06-01
## 415 2023-07-01
## 416 2023-08-01
## 417 2023-09-01
forecast <- predict(m, future)
tail(forecast[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')],12)
## ds yhat yhat_lower yhat_upper
## 406 2022-10-01 238573.0 221813.6 255149.6
## 407 2022-11-01 238962.4 222391.5 255820.1
## 408 2022-12-01 240076.1 222632.0 256268.1
## 409 2023-01-01 240322.6 223103.9 258040.5
## 410 2023-02-01 239737.8 223323.2 255355.8
## 411 2023-03-01 243032.6 226005.6 259310.2
## 412 2023-04-01 242130.9 224820.8 258532.5
## 413 2023-05-01 243165.3 226833.0 258911.3
## 414 2023-06-01 243730.6 227006.6 260841.5
## 415 2023-07-01 243707.7 226804.4 260362.0
## 416 2023-08-01 244772.9 227578.6 262116.2
## 417 2023-09-01 245975.6 229551.5 263816.8
plot(m, forecast)+theme_minimal()

prophet_plot_components(m, forecast)

# Extract predicted values from the forecast
predicted_values <- forecast$yhat
# Calculate residuals
residuals <- df$obs - predicted_values
jarque.bera.test(residuals)
##
## Jarque Bera Test
##
## data: residuals
## X-squared = 151.37, df = 2, p-value < 0.00000000000000022
a <- forecast$yhat
a <- tail(a,-12)
accuracy(a,test)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 221244.3 221337.5 221244.3 85.54106 85.54106 0.1576206 30.2831
accuracy(predicted_values,train)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set -1.359029 13229.52 9214.769 -0.6028779 7.017123 0.9514383 3.495323
NNETAR Forecast
fit <- nnetar(train,lambda=0)
nnetar_forecast <- forecast(fit,h=12,PI=T)
options(scipen = 999)
clrs <- c("red", "pink", "yellow", "steelblue")
autoplot(nnetar_forecast) +
autolayer(nnetar_forecast$mean, series="Forecast") +
autolayer(fitted(fit), series='Fitted') +
autolayer(train, series = 'Train') +
autolayer(test, series='Test', linetype="dashed") +
xlab("Time") + ylab("Millions of Dollars") +
guides(colour=guide_legend(title="Data series"),
fill=guide_legend(title="Prediction interval")) +
scale_color_manual(values=clrs)+ geom_vline(xintercept = 2022.833,color="black")+
theme_minimal()
## Warning: Removed 12 rows containing missing values (`geom_line()`).

accuracy(nnetar_forecast,test)
## ME RMSE MAE MPE MAPE MASE
## Training set 45.95199 3964.377 2434.613 -0.03504184 1.775753 0.1861196
## Test set 2010.17502 6879.788 5266.111 0.76196600 2.036234 0.4025800
## ACF1 Theil's U
## Training set 0.03863883 NA
## Test set 0.23861275 0.976854
sef <- as.vector(residuals(fit))
jarque.bera.test(sef)
##
## Jarque Bera Test
##
## data: sef
## X-squared = 11932, df = 2, p-value < 0.00000000000000022